Code for the single cell mouse analysis from CD45+ cells. Rivera et.al.

Script for analysing single cell RNA-seq data from Neutrophil mouse study # experimental arms #

short names:

c1 - PBS

c2 - CXCR2

c3 - BTZ-Dex

c4 - Triple

Hypothesis: Bor, CXCR, + DEX increase anti-tumor immune response - more (activated???) T-cells in this cohort.

library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(magrittr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract()   masks magrittr::extract()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.22.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(glmGamPoi)
## 
## Attaching package: 'glmGamPoi'
## 
## The following object is masked from 'package:dplyr':
## 
##     vars
## 
## The following object is masked from 'package:ggplot2':
## 
##     vars
library(RColorBrewer)
library(tibble)
library(ggplot2)
library(dplyr)
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ComplexHeatmap':
## 
##     add_heatmap
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
library(monocle3)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## 
## The following object is masked from 'package:SeuratObject':
## 
##     intersect
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## 
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## 
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## 
## The following object is masked from 'package:plotly':
## 
##     rename
## 
## The following objects are masked from 'package:lubridate':
## 
##     second, second<-
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following object is masked from 'package:utils':
## 
##     findMatches
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:plotly':
## 
##     slice
## 
## The following object is masked from 'package:lubridate':
## 
##     %within%
## 
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## The following object is masked from 'package:sp':
## 
##     %over%
## 
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'GenomicRanges'
## 
## The following object is masked from 'package:magrittr':
## 
##     subtract
## 
## 
## Attaching package: 'SummarizedExperiment'
## 
## The following object is masked from 'package:Seurat':
## 
##     Assays
## 
## The following object is masked from 'package:SeuratObject':
## 
##     Assays
## 
## 
## Attaching package: 'monocle3'
## 
## The following objects are masked from 'package:Biobase':
## 
##     exprs, fData, fData<-, pData, pData<-
library(devtools)
## Loading required package: usethis
library(SeuratWrappers)
library(EnhancedVolcano)
## Loading required package: ggrepel
library(stringr)
library(patchwork)
## 
## Attaching package: 'patchwork'
## 
## The following object is masked from 'package:cowplot':
## 
##     align_plots
#####                                            #####
#####.  If you are curious about my parameters   #####
#####   



#####  
#### c1 #### 
# c1.dat = Read10X(data.dir = "/samurlab2/RawData/RoswellMouse/RS-04392810_PBS_RS-04390274_count/outs/filtered_feature_bc_matrix/")
# c1 = CreateSeuratObject(c1.dat)
# 
# c1
# c1[["percent.mt"]] = PercentageFeatureSet(c1, pattern = "^mt-")
# c1[["percent.mt"]]
# 
# VlnPlot(c1, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0, cols = RColorBrewer::brewer.pal(n = 3, name = "Spectral"))
# 
# plot1 = FeatureScatter(c1, feature1 = "nCount_RNA", feature2 = "percent.mt")
# plot2 = FeatureScatter(c1, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "sample")
# plot1+plot2  
# 
# 
# meta = c1@meta.data 
# meta$cells = rownames(meta)
# 
# meta = meta %>%
#   dplyr::rename(seq_folder = orig.ident,
#                 nUMI = nCount_RNA,
#                 nGene = nFeature_RNA)
# View(meta)
# 
# #number of cells included in sample
# meta %>% 
#   ggplot(aes(x=nGene, fill = seq_folder)) + 
#   geom_bar()+
#   theme_classic()+
#   theme(axis.text.x =  element_text(angle=45, vjust = 1, hjust = 1))+
#   theme(plot.title=element_text(hjust=0.5, face="bold"))+
#   ggtitle("Number of Cells")
# 
# # Visualize the number UMIs/transcripts per cell to search for outliers
# meta %>% 
#   ggplot(aes(color=seq_folder, x=nUMI, fill= seq_folder)) + 
#   geom_density(alpha = 0.2) + 
#   scale_x_log10() + 
#   theme_classic() +
#   ylab("Cell density") +
#   geom_vline(xintercept = 8361.788)
# 
# #visualize the distribution of genes detected per cell via histogram 
# meta %>% 
#   ggplot(aes(color=sample, x=nGene, fill= sample)) + 
#   geom_density(alpha = 0.2) + 
#   theme_classic() +
#   scale_x_log10() + 
#   geom_vline(xintercept = 1184)+
#   scale_x_continuous(limits=c(100,5000))
# 
# 
# c1 = subset(c1, subset = nFeature_RNA>300 & nFeature_RNA<8000 & percent.mt <5)
# c1 = NormalizeData(c1, normalization.method = "LogNormalize", scale.factor = 1e4)
# 
# c1 = FindVariableFeatures(c1, selection.method = "vst", nfeatures = 2000)
# c1.10 = head(VariableFeatures(c1),10)
# c1.plot = VariableFeaturePlot(c1)
# c1.top10.p = LabelPoints(plot = c1.plot, points = c1.10, repel = T, ynudge = 0, xnudge = 0)  
# 
# all.genes = rownames(c1)
# c1 = ScaleData(c1, features = all.genes)
# 
# c1 = RunPCA(object = c1, features = VariableFeatures(object=c1), genes.print = 10)
# ElbowPlot(c1)
# 
# # c1 = JackStraw(c1, num.replicate = 100)
# # c1 = ScoreJackStraw(c1, dims=1:15)
# # JackStrawPlot(c1, dims = 1:15)
# 
# 
# 
# c1 = FindNeighbors(c1, dims = 1:30)
# c1 = FindClusters(c1, resolution = .4)
# c1 = RunUMAP(c1, dims = 1:30, n.components = 3) 
# DimPlot(c1, reduction = "umap", label=T)
# 
# c1 <- RunAzimuth(c1, reference = "bonemarrowref")
# 
# c1$predicted.celltype.l1
# 
# DimPlot(c1, reduction = "umap", group.by = "predicted.celltype.l1", label=T)
# DimPlot(c1, reduction = "umap", group.by = "predicted.celltype.l2", label=T)
# DimPlot(c1, reduction = "umap", group.by = "predicted.celltype.l3", label=T)
# 
# c1@active.ident = as.factor(c1$predicted.celltype.l1)
# 
# c1@assays$RNA$data = c1@assays$prediction.score.celltype.l1$data
# 
# saveRDS(c1, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c1.rds")
# c1 = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c1.rds')
# 
# ### c2 ####
# c2.dat = Read10X(data.dir = "/samurlab2/RawData/RoswellMouse/RS-04392811_CXCR2_RS-04390275_count/outs/filtered_feature_bc_matrix/")
# c2 = CreateSeuratObject(c2.dat)
# 
# c2@active.assay = "RNA"
# c2[["percent.mt"]] = PercentageFeatureSet(c2, pattern = "^mt-")
# c2[["percent.mt"]]
# 
# VlnPlot(c2, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0, cols = RColorBrewer::brewer.pal(n = 3, name = "Spectral"))
# 
# plot1 = FeatureScatter(c2, feature1 = "nCount_RNA", feature2 = "percent.mt")
# plot2 = FeatureScatter(c2, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "sample")
# plot1+plot2  
# 
# 
# meta = c2@meta.data 
# meta$cells = rownames(meta)
# 
# meta = meta %>%
#   dplyr::rename(seq_folder = orig.ident,
#                 nUMI = nCount_RNA,
#                 nGene = nFeature_RNA)
# View(meta)
# 
# #number of cells included in sample
# meta %>% 
#   ggplot(aes(x=nGene, fill = seq_folder)) + 
#   geom_bar()+
#   theme_classic()+
#   theme(axis.text.x =  element_text(angle=45, vjust = 1, hjust = 1))+
#   theme(plot.title=element_text(hjust=0.5, face="bold"))+
#   ggtitle("Number of Cells")
# 
# # Visualize the number UMIs/transcripts per cell to search for outliers
# meta %>% 
#   ggplot(aes(color=seq_folder, x=nUMI, fill= seq_folder)) + 
#   geom_density(alpha = 0.2) + 
#   scale_x_log10() + 
#   theme_classic() +
#   ylab("Cell density") +
#   geom_vline(xintercept = 8361.788)
# 
# #visualize the distribution of genes detected per cell via histogram 
# meta %>% 
#   ggplot(aes(color=sample, x=nGene, fill= sample)) + 
#   geom_density(alpha = 0.2) + 
#   theme_classic() +
#   scale_x_log10() + 
#   geom_vline(xintercept = 1184)+
#   scale_x_continuous(limits=c(100,5000))
# 
# 
# c2 = subset(c2, subset = nFeature_RNA>300 & nFeature_RNA<8000 & percent.mt <5)
# c2 = NormalizeData(c2, normalization.method = "LogNormalize", scale.factor = 1e4)
# 
# c2 = FindVariableFeatures(c2, selection.method = "vst", nfeatures = 2000)
# c2.10 = head(VariableFeatures(c2),10)
# c2.plot = VariableFeaturePlot(c2)
# c2.top10.p = LabelPoints(plot = c2.plot, points = c2.10, repel = T, ynudge = 0, xnudge = 0)  
# 
# all.genes = rownames(c2)
# c2 = ScaleData(c2, features = all.genes)
# 
# c2 = RunPCA(object = c2, features = VariableFeatures(object=c2), genes.print = 10)
# ElbowPlot(c2)
# 
# # c2 = JackStraw(c2, num.replicate = 100)
# # c2 = ScoreJackStraw(c2, dims=1:15)
# # JackStrawPlot(c2, dims = 1:15)
# 
# c2 = FindNeighbors(c2, dims = 1:30)
# c2 = FindClusters(c2, resolution = .4)
# c2 = RunUMAP(c2, dims = 1:30, n.components = 3) 
# DimPlot(c2, reduction = "umap", label=T)
# 
# library(Azimuth)
# c2_anno <- RunAzimuth(c2, reference = "bonemarrowref")
# 
# c2_anno$predicted.celltype.l1
# 
# DimPlot(c2_anno, reduction = "umap", group.by = "predicted.celltype.l1", label=T)
# DimPlot(c2_anno, reduction = "umap", group.by = "predicted.celltype.l2", label=T)
# 
# c2_anno@active.ident = as.factor(c2_anno$predicted.celltype.l1)
# 
# saveRDS(c2, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2.rds")
# c2_unlabeled = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2.rds')
# 
# saveRDS(c2_anno, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2_anno.rds")
# c2 = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2_anno.rds')
# 
# 
# ### c3 ####
# c3.dat = Read10X(data.dir = "/samurlab2/RawData/RoswellMouse/RS-04392812_BTZ-Dex-B-D_RS-04390276_count/outs/filtered_feature_bc_matrix/")
# c3 = CreateSeuratObject(c3.dat, assay = "RNA")
# 
# c3[["percent.mt"]] = PercentageFeatureSet(c3, pattern = "^mt-")
# c3[["percent.mt"]]
# 
# 
# VlnPlot(c3, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0, cols = RColorBrewer::brewer.pal(n = 3, name = "Spectral"))
# 
# plot1 = FeatureScatter(c3, feature1 = "nCount_RNA", feature2 = "percent.mt")
# plot2 = FeatureScatter(c3, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "sample")
# plot1+plot2  
# 
# 
# meta = c3@meta.data 
# meta$cells = rownames(meta)
# 
# meta = meta %>%
#   dplyr::rename(seq_folder = orig.ident,
#                 nUMI = nCount_RNA,
#                 nGene = nFeature_RNA)
# View(meta)
# 
# #number of cells included in sample
# meta %>% 
#   ggplot(aes(x=nGene, fill = seq_folder)) + 
#   geom_bar()+
#   theme_classic()+
#   theme(axis.text.x =  element_text(angle=45, vjust = 1, hjust = 1))+
#   theme(plot.title=element_text(hjust=0.5, face="bold"))+
#   ggtitle("Number of Cells")
# 
# # Visualize the number UMIs/transcripts per cell to search for outliers
# meta %>% 
#   ggplot(aes(color=seq_folder, x=nUMI, fill= seq_folder)) + 
#   geom_density(alpha = 0.2) + 
#   scale_x_log10() + 
#   theme_classic() +
#   ylab("Cell density") +
#   geom_vline(xintercept = 8361.788)
# 
# #visualize the distribution of genes detected per cell via histogram 
# meta %>% 
#   ggplot(aes(color=sample, x=nGene, fill= sample)) + 
#   geom_density(alpha = 0.2) + 
#   theme_classic() +
#   scale_x_log10() + 
#   geom_vline(xintercept = 1184)+
#   scale_x_continuous(limits=c(100,5000))
# 
# 
# c3 = subset(c3, subset = nFeature_RNA>300 & nFeature_RNA<8000 & percent.mt <5)
# c3 = NormalizeData(c3, normalization.method = "LogNormalize", scale.factor = 1e4)
# 
# c3 = FindVariableFeatures(c3, selection.method = "vst", nfeatures = 2000)
# c3.10 = head(VariableFeatures(c3),10)
# c3.plot = VariableFeaturePlot(c3)
# c3.top10.p = LabelPoints(plot = c3.plot, points = c3.10, repel = T, ynudge = 0, xnudge = 0)  
# 
# all.genes = rownames(c3)
# c3 = ScaleData(c3, features = all.genes)
# 
# c3 = RunPCA(object = c3, features = VariableFeatures(object=c3), genes.print = 10)
# ElbowPlot(c3)
# 
# # c3 = JackStraw(c3, num.replicate = 100)
# # c3 = ScoreJackStraw(c3, dims=1:15)
# # JackStrawPlot(c3, dims = 1:15)
# 
# 
# 
# c3 = FindNeighbors(c3, dims = 1:30)
# c3 = FindClusters(c3, resolution = .4)
# c3 = RunUMAP(c3, dims = 1:30, n.components = 3) 
# DimPlot(c3, reduction = "umap", label=T)
# 
# c_anno <- RunAzimuth(c3, reference = "bonemarrowref")
# 
# c_anno$predicted.celltype.l1
# 
# DimPlot(c_anno, reduction = "umap", group.by = "predicted.celltype.l1", label=T)
# DimPlot(c_anno, reduction = "umap", group.by = "predicted.celltype.l2", label=T)
# 
# c_anno@active.ident = as.factor(c_anno$predicted.celltype.l1)
# 
# c3_anno = c_anno
# saveRDS(c3, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3.rds")
# c3_unlabeled = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3.rds')
# 
# saveRDS(c_anno, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3_anno.rds")
# c3 = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3_anno.rds')
# 
# 
# ### c4 ####
# c4.dat = Read10X(data.dir = "/samurlab2/RawData/RoswellMouse/RS-04392813_Triple_RS-04390277_count/outs/filtered_feature_bc_matrix/")
# c4 = CreateSeuratObject(c4.dat, assay = "RNA")
# 
# c4[["percent.mt"]] = PercentageFeatureSet(c4, pattern = "^mt-")
# c4[["percent.mt"]]
# 
# 
# VlnPlot(c4, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0, cols = RColorBrewer::brewer.pal(n = 3, name = "Spectral"))
# 
# plot1 = FeatureScatter(c4, feature1 = "nCount_RNA", feature2 = "percent.mt")
# plot2 = FeatureScatter(c4, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "sample")
# plot1+plot2  
# 
# 
# meta = c4@meta.data 
# meta$cells = rownames(meta)
# 
# meta = meta %>%
#   dplyr::rename(seq_folder = orig.ident,
#                 nUMI = nCount_RNA,
#                 nGene = nFeature_RNA)
# View(meta)
# 
# #number of cells included in sample
# meta %>% 
#   ggplot(aes(x=nGene, fill = seq_folder)) + 
#   geom_bar()+
#   theme_classic()+
#   theme(axis.text.x =  element_text(angle=45, vjust = 1, hjust = 1))+
#   theme(plot.title=element_text(hjust=0.5, face="bold"))+
#   ggtitle("Number of Cells")
# 
# # Visualize the number UMIs/transcripts per cell to search for outliers
# meta %>% 
#   ggplot(aes(color=seq_folder, x=nUMI, fill= seq_folder)) + 
#   geom_density(alpha = 0.2) + 
#   scale_x_log10() + 
#   theme_classic() +
#   ylab("Cell density") +
#   geom_vline(xintercept = 8361.788)
# 
# #visualize the distribution of genes detected per cell via histogram 
# meta %>% 
#   ggplot(aes(color=sample, x=nGene, fill= sample)) + 
#   geom_density(alpha = 0.2) + 
#   theme_classic() +
#   scale_x_log10() + 
#   geom_vline(xintercept = 1184)+
#   scale_x_continuous(limits=c(100,5000))
# 
# 
# c4 = subset(c4, subset = nFeature_RNA>300 & nFeature_RNA<8000 & percent.mt <5)
# c4 = NormalizeData(c4, normalization.method = "LogNormalize", scale.factor = 1e4)
# 
# c4 = FindVariableFeatures(c4, selection.method = "vst", nfeatures = 2000)
# c4.10 = head(VariableFeatures(c4),10)
# c4.plot = VariableFeaturePlot(c4)
# c4.top10.p = LabelPoints(plot = c4.plot, points = c4.10, repel = T, ynudge = 0, xnudge = 0)  
# 
# all.genes = rownames(c4)
# c4 = ScaleData(c4, features = all.genes)
# 
# c4 <-  RunPCA(object = c4, features = VariableFeatures(object=c4), genes.print = 10)
# ElbowPlot(c4)
# 
# # c4 = JackStraw(c4, num.replicate = 100)
# # c4 = ScoreJackStraw(c4, dims=1:15)
# # JackStrawPlot(c4, dims = 1:15)
# 
# 
# 
# c4 <-  FindNeighbors(c4, dims = 1:30)
# c4 <-  FindClusters(c4, resolution = .4)
# c4 <-  RunUMAP(c4, dims = 1:30, n.components = 3) 
# DimPlot(c4, reduction = "umap", label=T)
# 
# library(Azimuth)
# c4_anno <- RunAzimuth(c4, reference = "bonemarrowref")
# 
# c4_anno$predicted.celltype.l1
# 
# DimPlot(c4_anno, group.by = "predicted.celltype.l1", label=T)
# DimPlot(c4_anno, group.by = "predicted.celltype.l2", label=T)
# 
# c4_anno@active.ident = as.factor(c4_anno$predicted.celltype.l1)
# 
# saveRDS(c4, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4.rds")
# c4_unlabeled = readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4.rds')
# 
# saveRDS(c4_anno, file = "/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4_anno.rds")
# c4 <-  readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4_anno.rds')
c1 <-  readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c1.rds')
c2 <-  readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c2_anno.rds')
c3 <-  readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c3_anno.rds')
c4 <-  readRDS(file = '/samurlab1/Joshua/RDS_obj/scMouse_CXCR2study/c4_anno.rds')

c1$sample = "c1"
c2$sample = "c2"
c3$sample = "c3"
c4$sample = "c4"
merge = merge(x = c1, y = c(c2,c3,c4))
## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.
merge = JoinLayers(merge)
merge$sample <- as.factor(merge$sample)


VlnPlot(merge, features = c("nFeature_RNA", "nCount_RNA","percent.mt"), ncol = 3, pt.size = 0)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#### statistics ####
merge <-  NormalizeData(merge, normalization.method = "LogNormalize", scale.factor = 1e4)
## Normalizing layer: counts
merge <-  FindVariableFeatures(merge, selection.method = "vst", nFeatures = 3000)
## Finding variable features for layer counts
all.genes <-  rownames(merge)
merge <-  ScaleData(merge, features = all.genes)
## Centering and scaling data matrix
merge <-  RunPCA(merge, features = VariableFeatures(merge), genes.print = 25)
## PC_ 1 
## Positive:  CLEC4D, G0S2, SLC7A11, CAMP, IL1B, FCGR3B, BASP1, ACVRL1, THBS1, CD63 
##     GDPD3, EGR1, INHBA, VAMP5, ISG15, CEBPE, ACOD1, CTSE, MGST2, FGL2 
##     GCA, PTGS2, OLFM4, AGPAT2, STEAP4, SOCS3, IFIT3, OASL, SERPINB1, HMOX1 
## Negative:  RPL10A, RPL3, RPL36A, RPS18, RPS2, RPS17, RPS19, RPL32, RACK1, NME2 
##     RPS5, RPL15, RANBP1, RPL22, RPL13, MT-ND1, SETSIP, RPS28, KRTCAP2, RPS4X 
##     YBX3, SNRPD1, RPS8, RPLP0, RPS6, RPL28, NAP1L1, RPL8, ANP32B, TUBA1B 
## PC_ 2 
## Positive:  SELENOW, FYN, GIMAP6, IL7R, ABLIM1, KLRD1, CD7, GIMAP4, GIMAP1, CHD3 
##     CTSS, CTSW, CD2AP, SKAP1, RNASE6, CD2, KBTBD11, TCF7, THY1, AC008012.1 
##     MRPL52, BCL2, TXK, ITGA4, ZFP36L1, GIMAP8, GIMAP7, CARD11, GIMAP1-GIMAP5, CCR5 
## Negative:  HMGB2, AC087721.2, MKI67, KIF11, PRC1, TPX2, BIRC5, SPC25, PCLAF, KIF15 
##     CDK1, KNL1, CCNA2, CDCA3, CENPE, KIF4A, HMMR, NDC80, SGO2, NUF2 
##     UBE2C, SPC24, PLK1, RRM2, FBXO5, KIF22, ASF1B, DLGAP5, NCAPG, TACC3 
## PC_ 3 
## Positive:  FN1, S100A4, IFI30, CTSS, PLD4, RASSF4, MS4A6A, LY86, PSAP, F13A1 
##     KLF4, CTSC, CD302, LRP1, PID1, MCUB, IRF5, NUPR1, NAAA, CST3 
##     CLEC4C, CX3CR1, CTSB, F10, RND3, RNASE6, OLFM1, PLCB1, CSF1R, PLEKHO1 
## Negative:  NKG7, ADGRG1, AC008012.1, MYB, ANGPT1, MUC13, CD27, NEDD4, RAB38, CST7 
##     GIMAP6, KIT, NRGN, ZFPM1, CD34, ADGRL4, BCL2, PHGDH, NME4, GIMAP1 
##     SOX4, SDSL, SLC22A3, IGFBP4, TSPAN4, IKZF2, CD63, LAT, WDR12, MYCT1 
## PC_ 4 
## Positive:  GIMAP4, GIMAP6, SKAP1, CD2, CTSW, THY1, ABLIM1, TCF7, TXK, IL2RB 
##     GIMAP7, CD3D, GIMAP8, TRBC1, BCL11B, LEF1, GIMAP1-GIMAP5, KLRD1, SH2D1A, CD226 
##     GIMAP1, ITK, IL7R, CD247, RASGRP1, LAT, IKZF3, KBTBD11, CD8B2, CD8A 
## Negative:  CTSG, MPO, SRM, PRTN3, FKBP11, CD34, GSTO1, SYCE2, NME4, SHMT1 
##     RAB38, MUC13, APEX1, IDH2, CST3, DNPH1, RANGRF, TSPAN4, WDR12, ADGRL4 
##     ADGRG1, RAMP1, PLAC8, HACD1, HELLS, TMEM97, CDC6, UNG, LMO2, MCRIP2 
## PC_ 5 
## Positive:  COX6A2, LY6D, CD300C, KMO, TCF4, SDC4, CCND1, AC020909.1, ATP1B1, TSC22D1 
##     PACSIN1, BLNK, UPB1, P2RY14, RUNX2, NET1, RPGRIP1, FCRLA, PAFAH1B3, PDZD4 
##     HS3ST1, CXXC5, CYB561A3, ATP2A1, SRGAP3, PTPRF, CACNA1E, GET1-SH3BGR, LIFR, PLTP 
## Negative:  GIMAP4, IFNGR1, S100A4, HOPX, F13A1, TCF7, TXK, SKAP1, CD3D, FN1 
##     IL2RB, NKG7, CD302, THY1, CD2, GIMAP7, CLEC4C, LRP1, TRBC1, F10 
##     BCL11B, CTSW, LEF1, NUPR1, GIMAP8, SH2D1A, CTSC, MCUB, CD226, CSF1R
ElbowPlot(merge)

merge <- FindNeighbors(merge, dims = 1:50)
## Computing nearest neighbor graph
## Computing SNN
merge <- FindClusters(merge, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 36805
## Number of edges: 1398308
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9260
## Number of communities: 17
## Elapsed time: 9 seconds
## 1 singletons identified. 16 final clusters.
merge <- RunUMAP(merge, dims = 1:50, n.components = 3)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:33:57 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:33:57 Read 36805 rows and found 50 numeric columns
## 16:33:57 Using Annoy for neighbor search, n_neighbors = 30
## 16:33:57 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:34:03 Writing NN index file to temp file /tmp/RtmppDEUZu/file558f870fa0f24
## 16:34:03 Searching Annoy index using 1 thread, search_k = 3000
## 16:34:17 Annoy recall = 100%
## 16:34:19 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:34:21 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:34:23 Commencing optimization for 200 epochs, with 1709708 positive edges
## 16:34:23 Using rng type: pcg
## 16:34:40 Optimization finished
merge$active.ident = merge$predicted.celltype.l1
p1 <-  DimPlot(merge, group.by = "sample")
p2 <-  DimPlot(merge, group.by = "active.ident", label = T)
p1+p2

merge@active.ident = as.factor(merge$active.ident)
merge.mono <- subset(merge, subset = predicted.celltype.l2 %in% c("CD14 Mono","CD16 Mono"))
DimPlot(merge.mono, split.by = "sample")

merge.mono <-  NormalizeData(merge.mono, normalization.method = "LogNormalize", scale.factor = 1e4)
## Normalizing layer: counts
merge.mono <-  FindVariableFeatures(merge.mono, selection.method = "vst", nFeatures = 3000)
## Finding variable features for layer counts
all.genes <-  rownames(merge.mono)
merge.mono <-  ScaleData(merge.mono, features = all.genes)
## Centering and scaling data matrix
merge.mono <-  RunPCA(merge.mono, features = VariableFeatures(merge.mono), genes.print = 25)
## PC_ 1 
## Positive:  CLEC4D, SLC7A11, BASP1, LTF, THBS1, RSAD2, CD63, EGR1, ACOD1, FCGR3B 
##     SRGN, GDPD3, MARCKS, PTGS2, OLFM4, HMGB2, ISG15, OASL, TENT5C, HSD11B1 
##     SOCS3, MARCKSL1, MGST2, CSF1, IFIT3, XPC, ADGRG3, IL1RL1, MMP13, TMEM132D 
## Negative:  RPL10A, RPL3, RPS18, NME2, RPL36A, RPL32, RACK1, RPS17, RPS5, RPS28 
##     RPS2, DBI, KRTCAP2, RPL13, MT-ND1, RPL15, AL928654.3, RPS4X, RPLP0, RPS8 
##     RPS15A, RPL19, RPL8, MT-ND2, EEF1A1, RPLP2, RPS6, PRDX1, RPS15, LGALS1 
## PC_ 2 
## Positive:  TOP2A, RRM2, PCLAF, SMC2, MKI67, STMN1, ESCO2, ASF1B, LIG1, CLSPN 
##     FCN1, RRM1, TYMS, CDK1, HMGB2, KIF15, TK1, KNL1, PRC1, TPX2 
##     CCNE2, SPC25, NDC80, SPC24, DTL, CENPH, BIRC5, PBK, SGO2, HMGB3 
## Negative:  CTSS, AHNAK, S100A4, CCR2, LRP1, CLEC4C, MNDA, CD2AP, PSAP, FN1 
##     RNASE6, ZEB2, AL928654.3, IFI30, CX3CR1, CCL15, CLEC4A, CD48, F13A1, CD302 
##     NUPR1, PLEKHO1, IRF5, GNGT2, KLF4, S100A10, SELENOW, MCUB, PTPRO, SLFN5 
## PC_ 3 
## Positive:  GIMAP6, LCK, ABLIM1, SKAP1, CTSW, GIMAP7, CD3E, GIMAP1, IL2RB, THY1 
##     GIMAP8, TCF7, PTPRCAP, BCL11B, GIMAP1-GIMAP5, LAT, TXK, TRBC1, IL7R, AC008012.1 
##     PRKCQ, CD226, CD27, KLRD1, SH2D1A, TESPA1, FAM189B, IKZF3, NKG7, GPR174 
## Negative:  FN1, F13A1, IFI30, C3, F10, CD302, MCUB, CCR2, LRP1, PLCB1 
##     NAAA, MSR1, HPSE, PLAC8, STXBP6, CCL15, ANXA5, TIFAB, KLF4, NAPSA 
##     TREM2, ARHGEF37, S100A4, HEXB, LAMP1, DUSP22, GPX1, PTPRO, OLFM1, NUPR1 
## PC_ 4 
## Positive:  SRGN, CLEC4D, MARCKS, LMNB1, ST8SIA4, FOS, ESCO2, CST3, KIF23, PRC1 
##     TOP2A, SPC25, ZEB2, TMPO, PSAP, PCLAF, NDC80, CALM2, CCNA2, CCNF 
##     RSAD2, NCAPG, PBK, SLFN5, SHISA5, MIS18BP1, SMC2, CDCA2, TUBA1B, NCAPH 
## Negative:  LTF, LTA4H, CEBPE, ZMPSTE24, TMEM216, ORM1, MGST2, CD63, FFAR2, C3 
##     AGPAT2, CD81, ICA1, NDUFV3, PDE4D, PGLS, BAZ1A, FCN1, C1QTNF12, PROM1 
##     CLEC12A, ATP5PD, SPTBN1, AP3S1, HEXB, COX6C, SELENOF, ATP6V0C, COX7B, HSD11B1 
## PC_ 5 
## Positive:  HMMR, ASPM, UBE2C, TPX2, CENPE, DEPDC1, CDC20, PRR11, BIRC5, KIF20A 
##     PRC1, CCNA2, SPC25, CDCA3, KNL1, KIF4A, KIF14, CCNB1, RACGAP1, KIF2C 
##     NUF2, LTF, CCNB2, AURKA, CKAP2, MXD3, CENPA, NDC80, KIF15, CEP55 
## Negative:  CTSG, MPO, SRGN, DMKN, SYCE2, SRM, CST7, PHGDH, HELLS, MCM2 
##     CDC6, MCM6, APEX1, CLEC10A, DTL, ZMYND19, GINS2, TMA16, UNG, TIMELESS 
##     DIO2, NME4, PLPPR3, PSMC3IP, WDR12, C1QBP, DHFR2, METTL1, CHEK1, HACD1
ElbowPlot(merge.mono)

merge.mono <- FindNeighbors(merge.mono, dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
merge.mono <- FindClusters(merge.mono, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 27089
## Number of edges: 939252
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9020
## Number of communities: 15
## Elapsed time: 5 seconds
merge.mono <- RunUMAP(merge.mono, dims = 1:30, n.components = 3)
## 16:36:14 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:36:14 Read 27089 rows and found 30 numeric columns
## 16:36:14 Using Annoy for neighbor search, n_neighbors = 30
## 16:36:14 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:36:17 Writing NN index file to temp file /tmp/RtmppDEUZu/file558f828f9090c
## 16:36:17 Searching Annoy index using 1 thread, search_k = 3000
## 16:36:26 Annoy recall = 100%
## 16:36:27 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:36:29 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:36:30 Commencing optimization for 200 epochs, with 1250694 positive edges
## 16:36:30 Using rng type: pcg
## 16:36:43 Optimization finished
p1 <-  DimPlot(merge.mono, group.by = "sample")
p2 <-  DimPlot(merge.mono, group.by = "predicted.celltype.l2", label = T)
p1+p2

From here I break into the monocytes (neutrophils) and all the other immune cells in the bone marrow microenvironment.

The idea here is to observe the differences caused by CXCR2 inhibitors with BTZ/DEX versus just BTZ/DEX and control.

Just neutrophils

merge.neuts = subset(merge.mono, subset = seurat_clusters %in% c(0,1,2,5,9))

DimPlot(merge.neuts, split.by = "sample")

merge.neuts <-  NormalizeData(merge.neuts, normalization.method = "LogNormalize", scale.factor = 1e4)
## Normalizing layer: counts
merge.neuts <-  FindVariableFeatures(merge.neuts, selection.method = "vst", nFeatures = 3000)
## Finding variable features for layer counts
all.genes <-  rownames(merge.neuts)
merge.neuts <-  ScaleData(merge.neuts, features = all.genes)
## Centering and scaling data matrix
merge.neuts <-  RunPCA(merge.neuts, features = VariableFeatures(merge.neuts), genes.print = 25)
## PC_ 1 
## Positive:  SRGN, CLEC4D, FTH1, STK17B, TPD52, CD300LF, SLPI, MARCKS, CCR1, IL1B 
##     DUSP1, IL36G, CD84, TSPAN13, EMILIN2, RGS2, TXNIP, ST8SIA4, SEPHS2, DHRS9 
##     TPM4, PIM1, RESF1, IFITM3, FOS, ZFP36L2, GPCPD1, GCNT2, NLRP3, GM2A 
## Negative:  CAMP, SERPINB1, CYBB, ANXA1, LCN2, DSTN, GOLIM4, ZMPSTE24, ORM1, ANXA3 
##     ADPGK, ABCA13, TMEM216, C3, INHBA, ALDH2, MGST2, TKT, TECR, FMNL2 
##     SCP2, FCN1, FFAR2, CD63, CD81, RHOU, LBP, ETS1, HEXB, NCOA4 
## PC_ 2 
## Positive:  MMP8, FPR1, ADPGK, LCN2, LIMS3, CAPG, ANXA1, CEACAM8, PRDX5, MAP4K4 
##     DSTN, SRI, VIM, TKT, CYBB, SAMD9L, DHRS9, PNPLA8, ALDH2, KMT5A 
##     ITGAM, NT5E, HOPX, MYL6, NCOA4, CHIA, RASA2, NIN, GOLIM4, PROK2 
## Negative:  IL1B, ITGA4, FCN1, GM2A, MARCKS, ORM1, HSPA5, CD244, PRR5L, EGR1 
##     TSPAN13, TAGLN2, IL13RA1, SLFN5, RPL8, PPP1R15A, CSF1, FOS, PTGS2, ST8SIA4 
##     TMEM216, CLEC6A, ARHGAP5, CTSS, RGS2, GCNT2, XBP1, DUSP1, RPS8, SOCS3 
## PC_ 3 
## Positive:  FCN1, ORM1, TMEM216, MS4A3, RPL35A, RPS17, MT-ND1, TMEM256, KRTCAP2, GATM 
##     SELENOS, UPF3B, DBI, PTGR1, MANF, RPL10A, RPS19, IGFBP4, RPL3, RPL28 
##     SPTSSA, LARP1B, CDCA7L, MT-ND2, PDE4D, RPS28, RPS8, S100A1, TIMM13, MRPL58 
## Negative:  IFIT3, SLFN5, STAT1, IRF7, CMPK2, SP140, PARP14, XAF1, RTP4, OASL 
##     AL353671.1, USP41, SAMD9L, ZBP1, HERC6, OAS3, ISG20, EIF2AK2, IRGM, CTSS 
##     PARP9, MAP4K4, ITGAM, STAT2, MYCBP2, MMP8, PTTG2, BST2, PLAC8, MACF1 
## PC_ 4 
## Positive:  IFIT3, SLFN5, XAF1, OASL, USP41, RTP4, HERC6, IRF7, ZBP1, CMPK2 
##     ISG20, IRGM, OAS3, STAT1, EIF2AK2, BST2, AL353671.1, PARP14, SP140, STAT2 
##     MITD1, IFIT1B, PARP9, PLAC8, CD274, ORM1, CTSS, FCN1, UBE2L6, PTTG2 
## Negative:  ITGAM, NIN, MACF1, MAP4K4, ZNF106, CHD4, TXNIP, ATRX, NABP1, SETX 
##     KCTD12, TPM4, DHRS9, ANKRD33B, RHOB, ZFP36L2, HOOK3, PHACTR2, GPCPD1, STK17B 
##     ITGA4, UTRN, ATP2B1, ALCAM, TPD52, MMP8, EMILIN2, ST8SIA4, CD84, TRPS1 
## PC_ 5 
## Positive:  FCN1, NAMPT, EID1, MOV10, CD84, IL36G, CLEC4D, DHRS9, NIN, SEPHS2 
##     MS4A3, TKT, ORM1, AL603832.3, HOPX, ZNF106, GATM, CAMK1D, FPR1, CDCA7L 
##     AMER2, PTGR1, TRPS1, SELENOP, STK17B, EMILIN2, SRGN, SFMBT2, TNFSF13B, ANKRD33B 
## Negative:  CLEC6A, THBS1, CSF1, GM2A, PPP1R15A, NR4A1, IGFBP6, ITGA4, OTULINL, CD244 
##     TGM2, EGR1, ATF3, CAMP, IL1B, ARHGAP5, PTGS2, IL1RAP, ITGAX, TP53INP2 
##     AC005154.5, STEAP4, GOLIM4, NRP1, CHIA, JUN, ABCA13, PLAC8, PRR5L, INHBA
ElbowPlot(merge.neuts)

merge.neuts <- FindNeighbors(merge.neuts, dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
merge.neuts <- FindClusters(merge.neuts, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 19796
## Number of edges: 567629
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8487
## Number of communities: 7
## Elapsed time: 2 seconds
## 1 singletons identified. 6 final clusters.
merge.neuts <- RunUMAP(merge.neuts, dims = 1:30, n.components = 3)
## 16:37:55 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:37:55 Read 19796 rows and found 30 numeric columns
## 16:37:55 Using Annoy for neighbor search, n_neighbors = 30
## 16:37:55 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:37:58 Writing NN index file to temp file /tmp/RtmppDEUZu/file558f83f394867
## 16:37:58 Searching Annoy index using 1 thread, search_k = 3000
## 16:38:04 Annoy recall = 100%
## 16:38:05 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:38:07 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:38:08 Commencing optimization for 200 epochs, with 926446 positive edges
## 16:38:08 Using rng type: pcg
## 16:38:17 Optimization finished
p1 <-  DimPlot(merge.neuts, group.by = "sample")
p2 <-  DimPlot(merge.neuts, , label = T)
p1+p2

Neutrophil Analysis

Here I am describing the immature and immature neutrophil populations

I am adding the

Pre-neutrophil

&

Mature neutrophil scores to grasp the dynamics of the cell populations we’ve captured

preneu.feats.mus = read.delim(file = "/samurlab1/Joshua/Neutrophil_outs/preneugenes.txt", sep ="")
preneu.feats.mus = preneu.feats.mus$x

musGenes = preneu.feats.mus
mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t")

convert_mouse_to_human <- function(gene_list){
  
  output = c()
  
  for(gene in gene_list){
    class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="mouse, laboratory"))[['DB.Class.Key']]
    if(!identical(class_key, integer(0)) ){
      human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="human"))[,"Symbol"]
      for(human_gene in human_genes){
        output = append(output,human_gene)
      }
    }
  }
  
  return (output)
}

preneu.homo = convert_mouse_to_human(as.vector(preneu.feats.mus))
merge.neuts = AddModuleScore(merge.neuts, features = list(preneu.homo), name = "preneu.sig")
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The following features are not present in the object: SLC25A10,
## ARHGAP19, TRIM59, LDHB, GPR84, RTEL1, CMC2, ZFP41, HMGN2, DTNB, PMF1-BGLAP,
## ELDR, ARHGAP11A, C1orf226, not searching for symbol synonyms
mat_neut_sig = read.table(file = "/samurlab1/Joshua/Neutrophil_outs/mature_neut_sig.txt", sep = "\t")
mat_neut_sig = as.list(mat_neut_sig)
merge.neuts = AddModuleScore(merge.neuts, features = mat_neut_sig, name = "mat_neut_sig")
## Warning: The following features are not present in the object: RETNLG, CCL6,
## PRR13, BTG1, CXCR2, FXYD5, H2-D1, FTL1, MAP1LC3B, CYP4F18, IL1F9, AMICA1,
## IFI27L2A, STFA2L1, CXCL2, GM5483, IFITM1, not searching for symbol synonyms

Score visualization

p1 = DimPlot(merge.neuts, label = T)
p2 = RidgePlot(merge.neuts, features = "preneu.sig1", group.by = "seurat_clusters")
p3 = RidgePlot(merge.neuts, features = "mat_neut_sig1", group.by = "seurat_clusters")
p1+p2+p3
## Picking joint bandwidth of 0.00566
## Picking joint bandwidth of 0.0483

merge.neuts.current.cluster.ids = c(0,1,2,3,4,5)
merge.neuts.new.cluster.ids =c("int mature", "mature","int immature","immature","mature","immature")
names(merge.neuts.new.cluster.ids) = levels(merge.neuts)     
merge.neuts = RenameIdents(merge.neuts, merge.neuts.new.cluster.ids)
DimPlot(merge.neuts, reduction="umap", label=TRUE, pt.size=.75, label.size = 5)

Data visualisation for confidence in data

# Mouse neutrophils
SCpubr::do_FeaturePlot(merge.neuts, features = "MKI67", pt.size = 0.75,plot.title = "MKI67")

SCpubr::do_FeaturePlot(merge.neuts, features = "LTF", pt.size = 0.75, plot.title = "LTF")

SCpubr::do_FeaturePlot(merge.neuts, features = "CAMP", pt.size = 0.75, plot.title = "CAMP")

SCpubr::do_FeaturePlot(merge.neuts, features = "MMP9", pt.size = 0.75, plot.title = "MMP9")

SCpubr::do_FeaturePlot(merge.neuts, features = "FCGR3B", pt.size = 0.75, plot.title = "FCGR3B")

SCpubr::do_FeaturePlot(merge.neuts, features = "VEGFA", pt.size = 0.75, plot.title = "VEGFA")

merge.neuts$active.ident = merge.neuts@active.ident
meta = merge.neuts@meta.data

plot_data <- meta %>%
  group_by(sample, active.ident) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(sample) %>%
  mutate(proportion = count / sum(count)) %>%
  mutate(label = paste0(round(proportion * 100, 1), "%"))  


ggplot(plot_data, aes(x = sample, y = proportion, fill = active.ident)) +
  geom_bar(stat = "identity", position = "stack") +
  geom_text(aes(label = label), 
            position = position_stack(vjust = 0.5),  
            size = 3, color = "black") +             
  theme_classic() +
  labs(
    title = "Proportional Representation of Neutrophils",
    x = "Dataset",
    y = "Proportion of Cells",
    fill = "Active Ident"
  ) +
  scale_y_continuous(labels = scales::percent_format()) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "right"
  )

Other immune populations

merge@active.ident = as.factor(merge$active.ident)
merge.else <- subset(merge, subset = predicted.celltype.l1 %in% c("B","CD4 T","CD8 T","DC","HSPC","NK","other T","other"))
DimPlot(merge.else, split.by = "sample")

merge.else <-  NormalizeData(merge.else, normalization.method = "LogNormalize", scale.factor = 1e4)
## Normalizing layer: counts
merge.else <-  FindVariableFeatures(merge.else, selection.method = "vst", nFeatures = 3000)
## Finding variable features for layer counts
all.genes <-  rownames(merge.else)
merge.else <-  ScaleData(merge.else, features = all.genes)
## Centering and scaling data matrix
merge.else <-  RunPCA(merge.else, features = VariableFeatures(merge.else), genes.print = 25)
## PC_ 1 
## Positive:  EEF1G, PTPRCAP, HSP90AB1, HSPE1, RPS2, NME2, PDCD4, AC008012.1, PPP1R14B, MIF 
##     SLC25A4, DDX21, MRPL52, SRGN, TSPAN13, NCL, FBL, BZW2, C1QBP, PARP1 
##     NOP10, MSI2, LGALS1, S100A10, MEF2C, GIMAP6, CHD3, WDR43, CALR, REXO2 
## Negative:  S100A8, CD177, LCN2, CAMP, ANXA1, C3, LTF, HP, MCEMP1, RFLNB 
##     PGLYRP1, LRG1, FCN1, ITGAM, CKAP4, CHI3L1, PRDX5, PYGL, CLEC5A, ORM1 
##     AC087721.2, INHBA, ADPGK, SYNE1, ARHGAP19-SLIT1, NCAM1, MEGF9, CYBB, GCA, UBE2C 
## PC_ 2 
## Positive:  ATP5IF1, DUT, ETFB, RANBP1, SYCE2, EIF5AL1, CMTM7, MPO, MCM3, DCTPP1 
##     DBI, YBX3, ELANE, CDCA7, DTYMK, GSTO1, PGAM1, SLC25A5, MCM7, PLAC8 
##     IMPDH2, PA2G4, TPI1, SELENOH, IGFBP4, NASP, NHP2, POLA1, RRM1, MCM5 
## Negative:  LTB, ABLIM1, IL7R, GIMAP6, GIMAP4, CD3D, CD3G, SKAP1, LCK, CD3E 
##     KLRD1, TCF7, THY1, IKZF3, GIMAP1, TXK, CTSW, ITK, GIMAP8, LEF1 
##     RASGRP1, BCL11B, KBTBD11, GIMAP1-GIMAP5, IL2RB, PTPRCAP, TRBC1, CD8B2, PRKCQ, SLAMF6 
## PC_ 3 
## Positive:  PSAP, PLD4, CTSS, CTSH, LY86, IFI30, RASSF4, RND3, TPD52, KLF4 
##     CCR2, AHNAK, RNASE6, TIFAB, MS4A6A, IRF5, SCPEP1, CST3, S100A4, FN1 
##     CCDC88A, LGALS1, F13A1, S100A6, LGALS3, CTSC, UPB1, CD2AP, NRP1, TGFBI 
## Negative:  NKG7, CD63, VAMP5, NEDD4, CD81, ADGRG3, CDK6, AC008012.1, ADGRG1, MYB 
##     PDE4D, TSPAN32, DACH1, RGCC, MS4A3, CA2, MUC13, SYNE1, CST7, SRGN 
##     PGLYRP1, ERG, CD27, IGFBP4, KDM5B, SDSL, ZFPM1, SERPINB1, TMEM40, ADGRL4 
## PC_ 4 
## Positive:  TCF4, COX6A2, CYB561A3, RUNX2, NUCB2, TSC22D1, LY6D, KMO, LAIR1, CCND1 
##     SDC4, P2RY14, SMIM5, PLTP, ATP1B1, LIFR, LRP8, AC020909.1, MEF2C, PACSIN1 
##     BLNK, FMNL2, PAQR5, MCTP2, RNASE6, PAFAH1B3, RPGRIP1, PTPRS, UPB1, NET1 
## Negative:  HOPX, EMB, S100A10, S100A4, GIMAP4, CD3D, F13A1, CD3G, AL928654.3, LCK 
##     CD3E, FN1, SKAP1, TCF7, TXK, ARL4C, CCR2, TGFBI, THY1, IFNGR1 
##     IL2RB, SMPDL3A, CTSW, TRBC1, BCL11B, LEF1, CTSC, NAAA, MCUB, GIMAP8 
## PC_ 5 
## Positive:  MGST2, RGCC, ELANE, HSD11B1, CD3D, ABLIM1, LTA4H, KLRD1, CD3G, TCF7 
##     LCK, CHD7, IGHM, GIMAP4, CD3E, CD8B2, LEF1, GATM, CTSW, ALAS1 
##     CD8A, TXK, KBTBD11, CLDN15, SLAMF6, TFRC, GRAMD2B, RFLNB, PDE2A, DMKN 
## Negative:  GATA2, CPA3, CSRP3, SLC18A2, CYP11A1, ITGA2B, HACD4, EMILIN2, STX3, CCL3L3 
##     CSF1, TBC1D4, SLC7A8, RGS1, AQP9, IL1RL1, IL6, RNASE12, CDH1, DAPP1 
##     CCL15, MYO1D, APOE, CCR1, NEDD4, IGFBP7, LILRB3, ADGRG1, IER3, CEBPA
ElbowPlot(merge.else)

merge.else <- FindNeighbors(merge.else, dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
merge.else <- FindClusters(merge.else, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10290
## Number of edges: 359982
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9373
## Number of communities: 18
## Elapsed time: 0 seconds
merge.else <- RunUMAP(merge.else, dims = 1:30, n.components = 3)
## 16:39:36 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:39:36 Read 10290 rows and found 30 numeric columns
## 16:39:36 Using Annoy for neighbor search, n_neighbors = 30
## 16:39:36 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:39:38 Writing NN index file to temp file /tmp/RtmppDEUZu/file558f8381bb28e
## 16:39:38 Searching Annoy index using 1 thread, search_k = 3000
## 16:39:40 Annoy recall = 100%
## 16:39:41 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:39:43 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:39:43 Commencing optimization for 200 epochs, with 414590 positive edges
## 16:39:43 Using rng type: pcg
## 16:39:48 Optimization finished
p1 <-  DimPlot(merge.else, group.by = "sample")
p2 <-  DimPlot(merge.else, group.by = "predicted.celltype.l2", label = T)
p1+p2

Now to examine other immune populations in mouse bone marrow upon chemotherapy exposure

cell_type_colors <- c(
  "B" = "#e31a1c",          
  "CD4 T" = "#1f78b4",     
  "CD8 T" = "#ffb000",     
  "DC" = "#00441b",              
  "HSPC" = "#1cd3a2",            
  "NK" = "#6a3d9a",               
  "other" = "#b15928",       
  "other T" = "#33a02c"     
)


merge.else$active.ident <- factor(merge.else$active.ident, levels = names(cell_type_colors))
meta = merge.else@meta.data 
plot_data <- meta %>%
  group_by(sample, active.ident) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(sample) %>%
  mutate(proportion = count / sum(count)) %>%
  mutate(label = paste0(round(proportion * 100, 1), "%"))  # Create label with 1 decimal place


# Step 2: Plot with text labels
ggplot(plot_data, aes(x = sample, y = proportion, fill = active.ident)) +
  geom_bar(stat = "identity", position = "stack") +
  geom_text(aes(label = label), 
            position = position_stack(vjust = 0.5),  
            size = 3, color = "black") +             
  theme_classic() +
  labs(
    title = "Proportional Representation of Neutrophils",
    x = "Dataset",
    y = "Proportion of Cells",
    fill = "Active Ident"
  ) +
  scale_y_continuous(labels = scales::percent_format()) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "right"
  )

SCpubr::do_DimPlot(merge.else, group.by = "active.ident",pt.size = 0.2)